refactor the restage runner
authorJoey Hess <joeyh@joeyh.name>
Fri, 23 Sep 2022 17:10:49 +0000 (13:10 -0400)
committerJoey Hess <joeyh@joeyh.name>
Fri, 23 Sep 2022 17:12:17 +0000 (13:12 -0400)
Sponsored-by: Dartmouth College's DANDI project
Annex/Link.hs
Logs/Restage.hs [new file with mode: 0644]

index 9cc39ab81d94976278b1cfc97c8c1784e689f4e9..c1d15d411e9c8bf88a6ea0c59944bce37eb1a52d 100644 (file)
@@ -177,7 +177,7 @@ newtype Restage = Restage Bool
  - gets to look at it.
  -}
 restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex ()
-restagePointerFile (Restage False) f _ =
+restagePointerFile (Restage False) f orig = do
        toplevelWarning True $ unableToRestage $ Just $ fromRawFilePath f
 restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
        -- Avoid refreshing the index if run by the
@@ -190,57 +190,58 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
                -- fails on "../../repo/path/file" when cwd is not in the repo 
                -- being acted on. Avoid these problems with an absolute path.
                absf <- liftIO $ absPath f
-               Annex.Queue.addFlushAction runner [(absf, isunmodified tsd, inodeCacheFileSize orig)]
+               Annex.Queue.addFlushAction restagePointerFileRunner
+                       [(absf, isunmodified tsd, inodeCacheFileSize orig)]
   where
        isunmodified tsd = genInodeCache f tsd >>= return . \case
                Nothing -> False
                Just new -> compareStrong orig new
 
-       -- Other changes to the files may have been staged before this
-       -- gets a chance to run. To avoid a race with any staging of
-       -- changes, first lock the index file. Then run git update-index
-       -- on all still-unmodified files, using a copy of the index file,
-       -- to bypass the lock. Then replace the old index file with the new
-       -- updated index file.
-       runner :: Git.Queue.FlushActionRunner Annex
-       runner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do
-               -- Flush any queued changes to the keys database, so they
-               -- are visible to child processes.
-               -- The database is closed because that may improve behavior
-               -- when run in Windows's WSL1, which has issues with
-               -- multiple writers to SQL databases.
-               liftIO . Database.Keys.Handle.closeDbHandle
-                       =<< Annex.getRead Annex.keysdbhandle
-               realindex <- liftIO $ Git.Index.currentIndexFile r
-               let lock = fromRawFilePath (Git.Index.indexFileLock realindex)
-                   lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock
-                   unlockindex = liftIO . maybe noop Git.LockFile.closeLock
-                   showwarning = warning $ unableToRestage Nothing
-                   go Nothing = showwarning
-                   go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
-                       let tmpindex = toRawFilePath (tmpdir </> "index")
-                       let updatetmpindex = do
-                               r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
-                                       =<< Git.Index.indexEnvVal tmpindex
-                               -- Avoid git warning about CRLF munging.
-                               let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
-                                       [ Param "-c"
-                                       , Param $ "core.safecrlf=" ++ boolConfig False
-                                       ] }
-                               configfilterprocess l $ runsGitAnnexChildProcessViaGit' r'' $ \r''' ->
-                                       liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed ->
-                                               forM_ l $ \(f', checkunmodified, _) ->
-                                                       whenM checkunmodified $
-                                                               feed f'
-                       let replaceindex = catchBoolIO $ do
-                               moveFile tmpindex realindex
-                               return True
-                       ok <- liftIO (createLinkOrCopy realindex tmpindex)
-                               <&&> updatetmpindex
-                               <&&> liftIO replaceindex
-                       unless ok showwarning
-               bracket lockindex unlockindex go
-       
+-- Other changes to the files may have been staged before this
+-- gets a chance to run. To avoid a race with any staging of
+-- changes, first lock the index file. Then run git update-index
+-- on all still-unmodified files, using a copy of the index file,
+-- to bypass the lock. Then replace the old index file with the new
+-- updated index file.
+restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex
+restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do
+       -- Flush any queued changes to the keys database, so they
+       -- are visible to child processes.
+       -- The database is closed because that may improve behavior
+       -- when run in Windows's WSL1, which has issues with
+       -- multiple writers to SQL databases.
+       liftIO . Database.Keys.Handle.closeDbHandle
+               =<< Annex.getRead Annex.keysdbhandle
+       realindex <- liftIO $ Git.Index.currentIndexFile r
+       let lock = fromRawFilePath (Git.Index.indexFileLock realindex)
+           lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock
+           unlockindex = liftIO . maybe noop Git.LockFile.closeLock
+           showwarning = warning $ unableToRestage Nothing
+           go Nothing = showwarning
+           go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
+               let tmpindex = toRawFilePath (tmpdir </> "index")
+               let updatetmpindex = do
+                       r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
+                               =<< Git.Index.indexEnvVal tmpindex
+                       -- Avoid git warning about CRLF munging.
+                       let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
+                               [ Param "-c"
+                               , Param $ "core.safecrlf=" ++ boolConfig False
+                               ] }
+                       configfilterprocess l $ runsGitAnnexChildProcessViaGit' r'' $ \r''' ->
+                               liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed ->
+                                       forM_ l $ \(f', checkunmodified, _) ->
+                                               whenM checkunmodified $
+                                                       feed f'
+               let replaceindex = catchBoolIO $ do
+                       moveFile tmpindex realindex
+                       return True
+               ok <- liftIO (createLinkOrCopy realindex tmpindex)
+                       <&&> updatetmpindex
+                       <&&> liftIO replaceindex
+               unless ok showwarning
+       bracket lockindex unlockindex go
+  where
        {- filter.annex.process configured to use git-annex filter-process
         - is sometimes faster and sometimes slower than using
         - git-annex smudge. The latter is run once per file, while
diff --git a/Logs/Restage.hs b/Logs/Restage.hs
new file mode 100644 (file)
index 0000000..75bba85
--- /dev/null
@@ -0,0 +1,51 @@
+{- git-annex restage log file
+ -
+ - Copyright 2022 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+{-# LANGUAGE OverloadedStrings #-}
+
+module Logs.Restage where
+
+import Annex.Common
+import Git.FilePath
+import Logs.File
+
+import qualified Data.ByteString.Lazy as L
+
+-- | Log a file whose pointer needs to be restaged in git.
+-- The content of the file may not be a pointer, if it is populated with
+-- the annex content. The InodeCache is used to verify that the file
+-- still contains the content, and it's still safe to restage its pointer.
+writeRestageLog :: TopFilePath -> InodeCache -> Annex ()
+writeRestageLog f ic = do
+       logf <- fromRepo gitAnnexRestageLog
+       lckf <- fromRepo gitAnnexRestageLock
+       appendLogFile logf lckf $ L.fromStrict $
+               encodeBS (showInodeCache ic) <> ":" <> getTopFilePath f
+
+-- | Streams the content of the restage log, and then empties the log at
+-- the end.
+--
+-- If the action is interrupted or throws an exception, the log file is
+-- left unchanged.
+--
+-- Locking is used to prevent new items being added to the log while this
+-- is running.
+streamSmudged :: (TopFilePath -> InodeCache -> Annex ()) -> Annex ()
+streamSmudged a = do
+       logf <- fromRepo gitAnnexRestageLog
+       lckf <- fromRepo gitAnnexRestageLock
+       streamLogFile (fromRawFilePath logf) lckf $ \l -> 
+               case parse l of
+                       Nothing -> noop
+                       Just (k, f) -> a f ic
+  where
+       parse l = 
+               let (ics, f) = separate (== ':') l
+               in do
+                       ic <- readInodeCache ics
+                       return (asTopFilePath (toRawFilePath f), ic)
+